package com.yzq.os.spider.v.service.job;

import java.util.Date;
import java.util.List;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.quartz.JobExecutionContext;
import org.quartz.JobExecutionException;
import org.quartz.StatefulJob;
import org.springframework.scheduling.quartz.QuartzJobBean;

import com.yzq.os.spider.v.Constants;
import com.yzq.os.spider.v.domain.SearchEngine;
import com.yzq.os.spider.v.service.domain.QueryURLService;
import com.yzq.os.spider.v.service.domain.SearchEngineService;
import com.yzq.os.spider.v.service.domain.SpiderRecordService;
import com.yzq.os.spider.v.service.http.HttpClientService;

/**
 * 执行抓取任务开始前的清理及初始化动作。一般配置在抓取任务执行之前。
 * @author 苑志强(xingyu_yzq@163.com)
 *
 */
public class CleanJobDetail extends QuartzJobBean implements StatefulJob {

	/**
	 * 不执行清理及初始化的搜索引擎ID,可用逗号分隔
	 */
	public static final String EXCLUDE_CLEAN_ENGINEIDS_KEY = "exclude_clean_engineids_key";

	private static Logger logger = Logger.getLogger(CleanJobDetail.class);

	@Override
	protected void executeInternal(JobExecutionContext ctx) throws JobExecutionException {
		String excludeCleanEngineIdsStr = ctx.getJobDetail().getJobDataMap().getString(EXCLUDE_CLEAN_ENGINEIDS_KEY);
		String[] excludeCleanEngineIds = null;
		if (StringUtils.isNotBlank(excludeCleanEngineIdsStr)) {
			excludeCleanEngineIds = StringUtils.split(excludeCleanEngineIdsStr, ",");
		}
		SearchEngineService searchEngineService = Constants.getApplicationContext().getBean("searchEngineService", SearchEngineService.class);
		QueryURLService queryURLService = Constants.getApplicationContext().getBean("queryURLService", QueryURLService.class);
		SpiderRecordService crawlJobService = Constants.getApplicationContext().getBean("crawlJobService", SpiderRecordService.class);
		HttpClientService httpClientService = Constants.getApplicationContext().getBean("httpClientService", HttpClientService.class);
		Date date = new Date();
		List<SearchEngine> engines = searchEngineService.findAllList();
		for (SearchEngine engine : engines) {
			try {
				int searchEngineId = engine.getId();
				if (excludeCleanEngineIds != null) {
					if (ArrayUtils.contains(excludeCleanEngineIds, String.valueOf(searchEngineId))) {
						logger.info("Skip search engine id:[" + searchEngineId + "] clean and init query url and create next save job table.");
						continue;
					}
				}
				initQueryUrlsForNext(searchEngineId, httpClientService, queryURLService);
				preCreateJobSaveTableForNextRun(searchEngineId, date, crawlJobService);
			} catch (Exception e) {
				logger.error("client search engine error.[" + engine.getId() + "]", e);
			}
		}
	}

	/**
	 * 从备份的搜索url表中同步url到运行表中，准备本次运行
	 * @param searchEngineId
	 * @param httpClientService
	 * @param queryURLService
	 * @throws Exception
	 */
	private void initQueryUrlsForNext(int searchEngineId, HttpClientService httpClientService, QueryURLService queryURLService) throws Exception {
		int beforeCount = queryURLService.count(searchEngineId);
		queryURLService.truncate(searchEngineId);
		queryURLService.initValidBakQueryUrls(searchEngineId);
		int afterCount = queryURLService.count(searchEngineId);
		logger.info("Done truncate and initialize from back for engine:[" + searchEngineId + "],beforeCount:[" + beforeCount + "],afterCount:[" + afterCount + "]");
	}


	/**
	 * 创建今天用来保存抓取记录的表
	 * @param searchEngineId
	 * @param date
	 * @param crawlJobService
	 */
	private void preCreateJobSaveTableForNextRun(int searchEngineId, Date date, SpiderRecordService crawlJobService) {
		String jobSaveTableName = SpiderRecordService.parseTableName(searchEngineId, date);
		crawlJobService.makeSureExistTable(jobSaveTableName);
		logger.info("Make sure job save table exist tableName:[" + jobSaveTableName + "]");
	}

}
